/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

/**
 * This file is an adaptation of spark's spark/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 grammar.
 * Reference: https://github.com/apache/spark/blob/v3.5.0/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
 */

// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons none, alignColons trailing
// $antlr-format singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true
// $antlr-format spaceBeforeAssignmentOperators false, groupedAlignments true

lexer grammar SparkSqlLexer;

options {
    caseInsensitive= true;
}

@members {
  /**
   * When true, parser should throw ParseException for unclosed bracketed comment.
   */
  public has_unclosed_bracketed_comment = false;

  /**
   * This method will be called when the character stream ends and try to find out the
   * unclosed bracketed comment.
   * If the method be called, it means the end of the entire character stream match,
   * and we set the flag and fail later.
   */
  public markUnclosedComment() {
    this.has_unclosed_bracketed_comment = true;
  }
}

SEMICOLON: ';';

LEFT_PAREN    : '(';
RIGHT_PAREN   : ')';
COMMA         : ',';
DOT           : '.';
LEFT_BRACKET  : '[';
RIGHT_BRACKET : ']';

// NOTE: If you add a new token in the list below, you should update the list of keywords
// and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`, and
// modify `ParserUtils.toExprAlias()` which assumes all keywords are between `ADD` and `ZONE`.

//============================
// Start of the keywords list
//============================
//--SPARK-KEYWORD-LIST-START
KW_ADD               : 'ADD';
KW_AFTER             : 'AFTER';
KW_ALL               : 'ALL';
KW_ALTER             : 'ALTER';
KW_ALWAYS            : 'ALWAYS';
KW_ANALYZE           : 'ANALYZE';
KW_AND               : 'AND';
KW_ANTI              : 'ANTI';
KW_ANY               : 'ANY';
KW_ANY_VALUE         : 'ANY_VALUE';
KW_ARCHIVE           : 'ARCHIVE';
KW_ARRAY             : 'ARRAY';
KW_AS                : 'AS';
KW_ASC               : 'ASC';
KW_AT                : 'AT';
KW_AUTHORIZATION     : 'AUTHORIZATION';
KW_BETWEEN           : 'BETWEEN';
KW_BIGINT            : 'BIGINT';
KW_BINARY            : 'BINARY';
KW_BOOLEAN           : 'BOOLEAN';
KW_BOTH              : 'BOTH';
KW_BUCKET            : 'BUCKET';
KW_BUCKETS           : 'BUCKETS';
KW_BY                : 'BY';
KW_BYTE              : 'BYTE';
KW_CACHE             : 'CACHE';
KW_CASCADE           : 'CASCADE';
KW_CASE              : 'CASE';
KW_CAST              : 'CAST';
KW_CATALOG           : 'CATALOG';
KW_CATALOGS          : 'CATALOGS';
KW_CHANGE            : 'CHANGE';
KW_CHAR              : 'CHAR';
KW_CHARACTER         : 'CHARACTER';
KW_CHECK             : 'CHECK';
KW_CLEAR             : 'CLEAR';
KW_CLUSTER           : 'CLUSTER';
KW_CLUSTERED         : 'CLUSTERED';
KW_CODEGEN           : 'CODEGEN';
KW_COLLATE           : 'COLLATE';
KW_COLLECTION        : 'COLLECTION';
KW_COLUMN            : 'COLUMN';
KW_COLUMNS           : 'COLUMNS';
KW_COMMENT           : 'COMMENT';
KW_COMMIT            : 'COMMIT';
KW_COMPACT           : 'COMPACT';
KW_COMPACTIONS       : 'COMPACTIONS';
KW_COMPUTE           : 'COMPUTE';
KW_CONCATENATE       : 'CONCATENATE';
KW_CONSTRAINT        : 'CONSTRAINT';
KW_COST              : 'COST';
KW_CREATE            : 'CREATE';
KW_CROSS             : 'CROSS';
KW_CUBE              : 'CUBE';
KW_CURRENT           : 'CURRENT';
KW_CURRENT_DATE      : 'CURRENT_DATE';
KW_CURRENT_TIME      : 'CURRENT_TIME';
KW_CURRENT_TIMESTAMP : 'CURRENT_TIMESTAMP';
KW_CURRENT_USER      : 'CURRENT_USER';
KW_DAY               : 'DAY';
KW_DAYS              : 'DAYS';
KW_DAYOFYEAR         : 'DAYOFYEAR';
KW_DATA              : 'DATA';
KW_DATE              : 'DATE';
KW_DATABASE          : 'DATABASE';
KW_DATABASES         : 'DATABASES';
KW_DATEADD           : 'DATEADD';
KW_DATE_ADD          : 'DATE_ADD';
KW_DATEDIFF          : 'DATEDIFF';
KW_DATE_DIFF         : 'DATE_DIFF';
KW_DBPROPERTIES      : 'DBPROPERTIES';
KW_DEC               : 'DEC';
KW_DECIMAL           : 'DECIMAL';
KW_DECLARE           : 'DECLARE';
KW_DEFAULT           : 'DEFAULT';
KW_DEFINED           : 'DEFINED';
KW_DELETE            : 'DELETE';
KW_DELIMITED         : 'DELIMITED';
KW_DESC              : 'DESC';
KW_DESCRIBE          : 'DESCRIBE';
KW_DFS               : 'DFS';
KW_DIRECTORIES       : 'DIRECTORIES';
KW_DIRECTORY         : 'DIRECTORY';
KW_DISABLE           : 'DISABLE';
KW_DISTINCT          : 'DISTINCT';
KW_DISTRIBUTE        : 'DISTRIBUTE';
KW_DIV               : 'DIV';
KW_DOUBLE            : 'DOUBLE';
KW_DROP              : 'DROP';
KW_ELSE              : 'ELSE';
KW_ENABLE            : 'ENABLE';
KW_END               : 'END';
KW_ESCAPE            : 'ESCAPE';
KW_ESCAPED           : 'ESCAPED';
KW_EXCEPT            : 'EXCEPT';
KW_EXCHANGE          : 'EXCHANGE';
KW_EXCLUDE           : 'EXCLUDE';
KW_EXISTS            : 'EXISTS';
KW_EXPLAIN           : 'EXPLAIN';
KW_EXPORT            : 'EXPORT';
KW_EXTENDED          : 'EXTENDED';
KW_EXTERNAL          : 'EXTERNAL';
KW_EXTRACT           : 'EXTRACT';
KW_FALSE             : 'FALSE';
KW_FETCH             : 'FETCH';
KW_FIELDS            : 'FIELDS';
KW_FILTER            : 'FILTER';
KW_FILEFORMAT        : 'FILEFORMAT';
KW_FIRST             : 'FIRST';
KW_FLOAT             : 'FLOAT';
KW_FOLLOWING         : 'FOLLOWING';
KW_FOR               : 'FOR';
KW_FOREIGN           : 'FOREIGN';
KW_FORMAT            : 'FORMAT';
KW_FORMATTED         : 'FORMATTED';
KW_FROM              : 'FROM';
KW_FULL              : 'FULL';
KW_FUNCTION          : 'FUNCTION';
KW_FUNCTIONS         : 'FUNCTIONS';
KW_GENERATED         : 'GENERATED';
KW_GLOBAL            : 'GLOBAL';
KW_GRANT             : 'GRANT';
KW_GROUP             : 'GROUP';
KW_GROUPING          : 'GROUPING';
KW_HAVING            : 'HAVING';
KW_BINARY_HEX        : 'X';
KW_HOUR              : 'HOUR';
KW_HOURS             : 'HOURS';
KW_IDENTIFIER        : 'IDENTIFIER';
KW_IF                : 'IF';
KW_IGNORE            : 'IGNORE';
KW_IMPORT            : 'IMPORT';
KW_IN                : 'IN';
KW_INCLUDE           : 'INCLUDE';
KW_INDEX             : 'INDEX';
KW_INDEXES           : 'INDEXES';
KW_INNER             : 'INNER';
KW_INPATH            : 'INPATH';
KW_INPUTFORMAT       : 'INPUTFORMAT';
KW_INSERT            : 'INSERT';
KW_INTERSECT         : 'INTERSECT';
KW_INTERVAL          : 'INTERVAL';
KW_INT               : 'INT';
KW_INTEGER           : 'INTEGER';
KW_INTO              : 'INTO';
KW_IS                : 'IS';
KW_ITEMS             : 'ITEMS';
KW_JOIN              : 'JOIN';
KW_KEYS              : 'KEYS';
KW_LAST              : 'LAST';
KW_LATERAL           : 'LATERAL';
KW_LAZY              : 'LAZY';
KW_LEADING           : 'LEADING';
KW_LEFT              : 'LEFT';
KW_LIFECYCLE         : 'LIFECYCLE';
KW_LIKE              : 'LIKE';
KW_ILIKE             : 'ILIKE';
KW_LIMIT             : 'LIMIT';
KW_LINES             : 'LINES';
KW_LIST              : 'LIST';
KW_LOAD              : 'LOAD';
KW_LOCAL             : 'LOCAL';
KW_LOCATION          : 'LOCATION';
KW_LOCK              : 'LOCK';
KW_LOCKS             : 'LOCKS';
KW_LOGICAL           : 'LOGICAL';
KW_LONG              : 'LONG';
KW_MACRO             : 'MACRO';
KW_MATERIALIZED      : 'MATERIALIZED';
KW_MAP               : 'MAP';
KW_MATCHED           : 'MATCHED';
KW_MERGE             : 'MERGE';
KW_MICROSECOND       : 'MICROSECOND';
KW_MICROSECONDS      : 'MICROSECONDS';
KW_MILLISECOND       : 'MILLISECOND';
KW_MILLISECONDS      : 'MILLISECONDS';
KW_MINUTE            : 'MINUTE';
KW_MINUTES           : 'MINUTES';
KW_MONTH             : 'MONTH';
KW_MONTHS            : 'MONTHS';
KW_MSCK              : 'MSCK';
KW_NAME              : 'NAME';
KW_NAMESPACE         : 'NAMESPACE';
KW_NAMESPACES        : 'NAMESPACES';
KW_NANOSECOND        : 'NANOSECOND';
KW_NANOSECONDS       : 'NANOSECONDS';
KW_NATURAL           : 'NATURAL';
KW_NO                : 'NO';
KW_NOSCAN            : 'NOSCAN';
KW_NOT               : 'NOT';
KW_NULL              : 'NULL';
KW_NULLS             : 'NULLS';
KW_NUMERIC           : 'NUMERIC';
KW_OF                : 'OF';
KW_OFFSET            : 'OFFSET';
KW_ON                : 'ON';
KW_ONLY              : 'ONLY';
KW_OPTIMIZE          : 'OPTIMIZE';
KW_OPTION            : 'OPTION';
KW_OPTIONS           : 'OPTIONS';
KW_OR                : 'OR';
KW_ORDER             : 'ORDER';
KW_OUT               : 'OUT';
KW_OUTER             : 'OUTER';
KW_OUTPUTFORMAT      : 'OUTPUTFORMAT';
KW_OVER              : 'OVER';
KW_OVERLAPS          : 'OVERLAPS';
KW_OVERLAY           : 'OVERLAY';
KW_OVERWRITE         : 'OVERWRITE';
KW_PARTITION         : 'PARTITION';
KW_PARTITIONED       : 'PARTITIONED';
KW_PARTITIONS        : 'PARTITIONS';
KW_PERCENTILE_CONT   : 'PERCENTILE_CONT';
KW_PERCENTILE_DISC   : 'PERCENTILE_DISC';
KW_PERCENTLIT        : 'PERCENT';
KW_PIVOT             : 'PIVOT';
KW_PLACING           : 'PLACING';
KW_POSITION          : 'POSITION';
KW_PRECEDING         : 'PRECEDING';
KW_PRIMARY           : 'PRIMARY';
KW_PRINCIPALS        : 'PRINCIPALS';
KW_PROPERTIES        : 'PROPERTIES';
KW_PURGE             : 'PURGE';
KW_QUARTER           : 'QUARTER';
KW_QUERY             : 'QUERY';
KW_RANGE             : 'RANGE';
KW_REAL              : 'REAL';
KW_RECORDREADER      : 'RECORDREADER';
KW_RECORDWRITER      : 'RECORDWRITER';
KW_RECOVER           : 'RECOVER';
KW_REDUCE            : 'REDUCE';
KW_REFERENCES        : 'REFERENCES';
KW_REFRESH           : 'REFRESH';
KW_RENAME            : 'RENAME';
KW_REPAIR            : 'REPAIR';
KW_REPEATABLE        : 'REPEATABLE';
KW_REPLACE           : 'REPLACE';
KW_RESET             : 'RESET';
KW_RESPECT           : 'RESPECT';
KW_RESTRICT          : 'RESTRICT';
KW_REWRITE           : 'REWRITE';
KW_REVOKE            : 'REVOKE';
KW_RIGHT             : 'RIGHT';
KW_RLIKE             : 'RLIKE';
KW_REGEXP            : 'REGEXP';
KW_ROLE              : 'ROLE';
KW_ROLES             : 'ROLES';
KW_ROLLBACK          : 'ROLLBACK';
KW_ROLLUP            : 'ROLLUP';
KW_ROW               : 'ROW';
KW_ROWS              : 'ROWS';
KW_SECOND            : 'SECOND';
KW_SECONDS           : 'SECONDS';
KW_SCHEMA            : 'SCHEMA';
KW_SCHEMAS           : 'SCHEMAS';
KW_SELECT            : 'SELECT';
KW_SEMI              : 'SEMI';
KW_SEPARATED         : 'SEPARATED';
KW_SERDE             : 'SERDE';
KW_SERDEPROPERTIES   : 'SERDEPROPERTIES';
KW_SESSION_USER      : 'SESSION_USER';
KW_SET               : 'SET';
KW_MINUS             : 'MINUS';
KW_SETS              : 'SETS';
KW_SHORT             : 'SHORT';
KW_SHOW              : 'SHOW';
KW_SINGLE            : 'SINGLE';
KW_SKEWED            : 'SKEWED';
KW_SMALLINT          : 'SMALLINT';
KW_SOME              : 'SOME';
KW_SORT              : 'SORT';
KW_SORTED            : 'SORTED';
KW_SOURCE            : 'SOURCE';
KW_START             : 'START';
KW_STATISTICS        : 'STATISTICS';
KW_STORED            : 'STORED';
KW_STRATIFY          : 'STRATIFY';
KW_STRING            : 'STRING';
KW_STRUCT            : 'STRUCT';
KW_SUBSTR            : 'SUBSTR';
KW_SUBSTRING         : 'SUBSTRING';
KW_SYNC              : 'SYNC';
KW_SYSTEM            : 'SYSTEM';
KW_SYSTEM_TIME       : 'SYSTEM_TIME';
KW_SYSTEM_VERSION    : 'SYSTEM_VERSION';
KW_TABLE             : 'TABLE';
KW_TABLES            : 'TABLES';
KW_TABLESAMPLE       : 'TABLESAMPLE';
KW_TARGET            : 'TARGET';
KW_TBLPROPERTIES     : 'TBLPROPERTIES';
KW_TEMPORARY         : 'TEMPORARY';
KW_TERMINATED        : 'TERMINATED';
KW_THEN              : 'THEN';
KW_TIME              : 'TIME';
KW_TIMEDIFF          : 'TIMEDIFF';
KW_TIMESTAMP         : 'TIMESTAMP';
KW_TIMESTAMP_LTZ     : 'TIMESTAMP_LTZ';
KW_TIMESTAMP_NTZ     : 'TIMESTAMP_NTZ';
KW_TIMESTAMPADD      : 'TIMESTAMPADD';
KW_TIMESTAMPDIFF     : 'TIMESTAMPDIFF';
KW_TINYINT           : 'TINYINT';
KW_TO                : 'TO';
KW_TOUCH             : 'TOUCH';
KW_TRAILING          : 'TRAILING';
KW_TRANSACTION       : 'TRANSACTION';
KW_TRANSACTIONS      : 'TRANSACTIONS';
KW_TRANSFORM         : 'TRANSFORM';
KW_TRIM              : 'TRIM';
KW_TRUE              : 'TRUE';
KW_TRUNCATE          : 'TRUNCATE';
KW_TRY_CAST          : 'TRY_CAST';
KW_TYPE              : 'TYPE';
KW_UNARCHIVE         : 'UNARCHIVE';
KW_UNBOUNDED         : 'UNBOUNDED';
KW_UNCACHE           : 'UNCACHE';
KW_UNION             : 'UNION';
KW_UNIQUE            : 'UNIQUE';
KW_UNKNOWN           : 'UNKNOWN';
KW_UNLOCK            : 'UNLOCK';
KW_UNPIVOT           : 'UNPIVOT';
KW_UNSET             : 'UNSET';
KW_UPDATE            : 'UPDATE';
KW_USE               : 'USE';
KW_USER              : 'USER';
KW_USING             : 'USING';
KW_VALUES            : 'VALUES';
KW_VARCHAR           : 'VARCHAR';
KW_VAR               : 'VAR';
KW_VARIABLE          : 'VARIABLE';
KW_VERSION           : 'VERSION';
KW_VIEW              : 'VIEW';
KW_VIEWS             : 'VIEWS';
KW_VOID              : 'VOID';
KW_WEEK              : 'WEEK';
KW_WEEKS             : 'WEEKS';
KW_WHEN              : 'WHEN';
KW_WHERE             : 'WHERE';
KW_WINDOW            : 'WINDOW';
KW_WITH              : 'WITH';
KW_WITHIN            : 'WITHIN';
KW_YEAR              : 'YEAR';
KW_YEARS             : 'YEARS';
KW_ZONE              : 'ZONE';
KW_ZORDER            : 'ZORDER';
//--SPARK-KEYWORD-LIST-END
//============================
// End of the keywords list
//============================

EQ   : '=' | '==';
NSEQ : '<=>';
NEQ  : '<>';
NEQJ : '!=';
LT   : '<';
LTE  : '<=' | '!>';
GT   : '>';
GTE  : '>=' | '!<';

NOT         : '!';
PLUS        : '+';
MINUS       : '-';
ASTERISK    : '*';
SLASH       : '/';
PERCENT     : '%';
TILDE       : '~';
AMPERSAND   : '&';
PIPE        : '|';
CONCAT_PIPE : '||';
HAT         : '^';
COLON       : ':';
ARROW       : '->';
FAT_ARROW   : '=>';
HENT_START  : '/*+';
HENT_END    : '*/';
QUESTION    : '?';

STRING_LITERAL: '\'' ( ~('\'' | '\\') | ('\\' .))* '\'' | 'R\'' (~'\'')* '\'' | 'R"' (~'"')* '"';

DOUBLEQUOTED_STRING: '"' ( ~('"' | '\\') | ('\\' .))* '"';

// NOTE: If you move a numeric literal, you should modify `ParserUtils.toExprAlias()`
// which assumes all numeric literals are between `BIGINT_LITERAL` and `BIGDECIMAL_LITERAL`.

BIGINT_LITERAL: DIGIT+ 'L';

SMALLINT_LITERAL: DIGIT+ 'S';

TINYINT_LITERAL: DIGIT+ 'Y';

INTEGER_VALUE: DIGIT+;

EXPONENT_VALUE: DIGIT+ EXPONENT | DECIMAL_DIGITS EXPONENT;

DECIMAL_VALUE: DECIMAL_DIGITS;

FLOAT_LITERAL: DIGIT+ EXPONENT? 'F' | DECIMAL_DIGITS EXPONENT? 'F';

DOUBLE_LITERAL: DIGIT+ EXPONENT? 'D' | DECIMAL_DIGITS EXPONENT? 'D';

BIGDECIMAL_LITERAL: DIGIT+ EXPONENT? 'BD' | DECIMAL_DIGITS EXPONENT? 'BD';

IDENTIFIER: (LETTER | DIGIT | '_')+;

BACKQUOTED_IDENTIFIER: '`' ( ~'`' | '``')* '`';

fragment DECIMAL_DIGITS: DIGIT+ '.' DIGIT* | '.' DIGIT+;

fragment EXPONENT: 'E' [+-]? DIGIT+;

fragment DIGIT: [0-9];

fragment LETTER: [A-Z];

SIMPLE_COMMENT: '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN);

BRACKETED_COMMENT:
    '/*' (BRACKETED_COMMENT | .)*? ('*/' | {this.markUnclosedComment();} EOF) -> channel(HIDDEN);

WS: [ \r\n\t]+ -> channel(HIDDEN);

// Catch-all for anything we can't recognize.
// We use this to be able to ignore and recover all the text
// when splitting statements with DelimiterLexer
UNRECOGNIZED: .;